import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# from huggingface_hub import login
# login(token='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')

model_name = '/data1/models/llama3'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

model.eval()

test_input = '你好，请问'
inputs = tokenizer(test_input, return_tensors='pt', max_len=128,truncation=True)
outputs = model(**inputs)

scripted_model = torch.jit.script(model)
scripted_model = scripted_model.eval()
scripted_model.save('llama.pt')
torch.jit.load('llama.pt')


